
library(readxl)
homicide <- read_excel("~/Documents/missouri_project/data/Homicide_Cov_Bor_1.xlsx")


library(dplyr)
library(car)

# select MO and the eight covariant states
data_homicide <- homicide %>% 
  filter(state=="Missouri" | state== "Colorado" | state=="Georgia" | state=="Illinois" 
         | state=="Kentucky" | state=="Texas" | state=="South Carolina" | state=="Ohio"
         | state=="Indiana")


# Remove the 2007 data
data_homicide <- data_homicide %>% filter(year!=2007)

# Create an indicator variable about whether the subject is Missouri with years from 2008-2019
data_homicide$MO_2008ge <- 0
data_homicide$MO_2008ge[which(data_homicide$year>=2008 & data_homicide$state=="Missouri")] <- 1

# Create another data frame with year only 2006, 2008-2013 data
data_homicide2 <- data_homicide %>% filter(year<=2013)

# Turn state and year into factor variables
data_homicide$year <- as.factor(data_homicide$year)
data_homicide$state <- as.factor(data_homicide$state)

data_homicide2$year <- as.factor(data_homicide2$year)
data_homicide2$state <- as.factor(data_homicide2$state)

### fit a linear model for year 2006, 2008-2019 data
fit1 <- lm(rate~state+year+MO_2008ge-1,data_homicide)
summary(fit1)
Anova(fit1)

### find confidence interval
1.54688-1.96*(0.634)
1.54688+1.96*(0.634)

library(clubSandwich)
vcovCR(fit1, cluster = data_homicide$state, type = "CR2")

### fit a linear model for year 2006, 2008-2013 data
fit2 <- lm(rate~state+year+MO_2008ge-1,data_homicide2)
summary(fit2)
Anova(fit2)

vcovCR(fit2, cluster = data_homicide2$state, type = "CR2")




